
** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

/////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.6: Priority and P1 Subject-Specific Performance - All Applicants ///////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
** Opening Phase 1 norm_scores dataset 
use "Work Data/Gender_Phase1_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Mathematics
rename d_sub6 Physics
rename d_sub7 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
* Checking
foreach v of varlist Biology-Portuguese {
tab subject `v', mi
}

** Interaction: priority X female
gen fem_priority=female*priority
label var fem_priority "Female $\times$ Future Priority"

** Interaction: priority X subject
foreach v of varlist Biology-Portuguese {
gen fem_`v'=`v'*female
label var fem_`v' "Female $\times$ `v'"
gen prio_`v'=priority*`v'
label var prio_`v' "Future priority $\times$ `v'"
gen fem_prio_`v'=fem_priority*`v'
label var fem_prio_`v' "Female $\times$ Future priority $\times$ `v'"
}

global subject "Chemistry Geography History Mathematics Physics"
global subject_fem "fem_Chemistry fem_Geography fem_History fem_Mathematics fem_Physics"

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM

foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop norm_enem_w_ave_g

* Interaction: subject X ENEM
foreach v of varlist Biology-Portuguese {
gen enem_`v'=`v'*norm_enem_w_g
label var enem_`v' "ENEM scores $\times$ `v'"
gen fem_enem_`v'=female*norm_enem_w_g*`v'
label var fem_enem_`v' "Female $\times$ ENEM scores $\times$ `v'"
forvalues i=2(1)4 {
gen enem_`v'_`i'=enem_`v'^`i'
gen fem_enem_`v'_`i'=fem_enem_`v'^`i'
}
sum enem_`v'* fem_enem_`v'*
}

global g_pol_enem_sub "enem_Chemistry* enem_Geography* enem_History* enem_Mathematics* enem_Physics*"
d $g_pol_enem_sub

* Priority x relative performance in ENEM:
foreach v in norm_enem_w {
gen `v'_priority_g=`v'_g*priority
forvalues i=2(1)4 {
gen `v'_priority_g`i'=`v'_g^`i'*priority
sum `v'_priority_g`i'
}
}

global g_norm_enem_w_prio norm_enem_w_priority_g*
d $g_norm_enem_w_prio

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year == 2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 drop if subject=="lang" | subject=="port" 
 drop Portuguese prio_Portuguese fem_prio_Portuguese 
 
 * All applicants
 tab pass_st1
 
*********************************************************************************
****************   Regressions **************************************************
*********************************************************************************

 ***************************************************
 ****** Scores normalized by year and subject ******
 ***************************************************

** Labeling variables
label var priority "Future Priority"
label var female "Female"
label var norm_enem_w_g "ENEM"

***  P1 score

estimates clear

reg norm_score female priority fem_priority norm_enem_w_g, cluster(inscri2) 
estimates store reg1
reg norm_score female priority fem_priority norm_enem_w_g $subject $subject_fem, cluster(inscri2) 
estimates store reg2
reghdfe norm_score priority fem_priority $subject $subject_fem , cluster(inscri2) absorb(inscri2)  
estimates store reg3
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub, cluster(inscri2) absorb(inscri2)  
estimates store reg4
reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio, cluster(inscri2) absorb(inscri2)  
estimates store reg5

estadd local sub_fe "No":  reg1
estadd local sub_fe "Yes": reg2 reg3 reg4 reg5

estadd local subgender_fe "No":  reg1
estadd local subgender_fe "Yes": reg2 reg3 reg4 reg5

estadd local ind_fe "No": reg1 reg2  
estadd local ind_fe "Yes": reg3 reg4 reg5

estadd local enemsub "No":  reg1 reg2 reg3
estadd local enemsub "Yes":  reg4 reg5

estadd local enemprio_pol4 "No":  reg1 reg2 reg3 reg4
estadd local enemprio_pol4 "Yes": reg5 


* Tex
esttab reg1 reg2 reg3 reg4 reg5 using "Output/general_result_phase1score_all-applicants.tex", se star(* 0.10 ** 0.05 *** 0.01) nogap ///
stats(r2_a N N_clust sep sub_fe ind_fe enemsub enemprio_pol4, fmt(%9.3fc %9.0fc %9.0fc %1s %3s %3s %3s) labels("$\bar{R}^2$" "Number of observations"  "Number of applicants" " " "Subject FE" "Individual FE" "ENEM $\times$ Subject FE" "ENEM $\times$ Future Priority")) b(%7.3f) se(%7.3f)  booktabs replace f label nomtitle collabels(none) keep(female priority fem_priority norm_enem_w_g $subject_fem) refcat(female " \\ \multicolumn{5}{l}{\textit{Dependent variable: Phase 1 normalized subject-specific scores}} \\", nolabel) 

* Tex
esttab reg1 reg2 reg3 reg4 reg5 using "Output/p_general_result_phase1score_all-applicants.tex", se star(* 0.10 ** 0.05 *** 0.01) nogap ///
stats(r2_a N N_clust sep sub_fe subgender_fe ind_fe enemsub enemprio_pol4 , fmt(%9.3fc %9.0fc %9.0fc %1s %3s %3s %3s %3s) /// 
labels("$\bar{R}^2$" "Number of observations"  "Number of applicants" " " "Subject FE" "Subject-gender FE" /// 
 "Individual FE" "ENEM $\times$ Subject FE" "ENEM $\times$ Future Priority")) b(%7.3f) se(%7.3f)  booktabs replace f label nomtitle collabels(none) keep(female priority fem_priority norm_enem_w_g) refcat(female " \\ \multicolumn{5}{l}{\textit{Dependent variable: Phase 1 normalized subject-specific scores}} \\", nolabel)